Analysis of first run of the benchmarking experiment

Author

György Barabás

Loading and tidying the data

We first set up some functions to load and tidy the raw data:

library(tidyverse)
library(broom)
library(ggfortify)
library(jsonlite)
library(knitr)
library(scales)
library(kableExtra)



serversFromConfig <- function(configFile = "../config.json") {
  fromJSON(configFile) |>
    as_tibble() |>
    select(contains("dl")) |>
    mutate(server = str_c("Server ", 1:3), .before = 1) |>
    rename_with(\(x) str_remove(x, "_dl_servers"), !server) |>
    pivot_longer(!server, names_to = "platform", values_to = "ip") |>
    mutate(platform = case_match(
      platform,
      "swarm" ~ "Swarm",
      "ipfs"  ~ "IPFS",
      "arw"   ~ "Arweave"
    ))
}


dataFromJsonRaw <- function(jsonFile = "../results.json") {
  jsonlite::fromJSON(jsonFile) |>
    as_tibble() |>
    unnest(tests) |>
    unnest(results) |>
    rename(time_sec = download_time_seconds,
           replicate = ref,
           platform = storage)
}


dataFromJson <- function(rawTable) {
  rawTable |>
    mutate(sha256_match = (sha256_match == "true")) |>
    mutate(platform = ifelse(platform == "Ipfs", "IPFS", platform)) |>
    mutate(size_kb = as.integer(size)) |>
    select(!size & !server & !timestamp) |>
    left_join(serversFromConfig(), by = join_by(platform, ip)) |>
    relocate(size_kb, server, time_sec, attempts, sha256_match,
             .after = platform)
}

After loading and tidying the data, here’s what the first few rows of the table look like:

dat <- dataFromJson(dataFromJsonRaw())

dat |>
  head(n = 10) |>
  kable()
platform size_kb server time_sec attempts sha256_match ip latitude longitude replicate
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27
Swarm 1 Server 2 0.1379130 1 TRUE 188.245.154.61:1633 49.4542 11.0775 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27
Swarm 1 Server 3 0.1654890 1 TRUE 188.245.177.151:1633 49.4542 11.0775 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7
Swarm 1 Server 2 0.3394287 1 TRUE 188.245.154.61:1633 49.4542 11.0775 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7
Swarm 1 Server 3 0.1937695 1 TRUE 188.245.177.151:1633 49.4542 11.0775 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56
Swarm 1 Server 2 0.3230913 1 TRUE 188.245.154.61:1633 49.4542 11.0775 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56
Swarm 1 Server 3 0.3515806 1 TRUE 188.245.177.151:1633 49.4542 11.0775 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56
Swarm 1 Server 1 0.0000000 15 FALSE 5.9.50.180:8080 50.4779 12.3713 7049fe5e08fd855c3b89788a317f51bf844c20ecf3c5f71f863d8a7c9ed2af0d

We can do some sanity checks. First of all, the experiment is well balanced, with 30 replicates per size, server, and platform:

dat |>
  count(size_kb, server, platform, name = "number of replicates") |>
  count(`number of replicates`,
        name = "size-server-platform combinations") |>
  kable()
number of replicates size-server-platform combinations
30 45

And the replicates are also correctly assigned:

dat |>
  count(server, replicate, name = "number of replicates") |>
  count(`number of replicates`,
        name = "server-replicate combinations") |>
  kable()
number of replicates server-replicate combinations
1 1350

Let us check if any of the sha256 matches failed:

dat |>
  count(sha256_match) |>
  kable()
sha256_match n
FALSE 150
TRUE 1200

Indeed, there are 150 failures. Let us check where those failed attempts are:

dat |>
  filter(!sha256_match) |>
  count(platform, size_kb, server) |>
  kable()
platform size_kb server n
Swarm 1 Server 1 30
Swarm 10 Server 1 30
Swarm 100 Server 1 30
Swarm 1000 Server 1 30
Swarm 10000 Server 1 30

In short, all Swarm downloads on Server 1 have failed, and nothing else.

Those same failed downloads also always had 15 download attempts. All other downloads succeeded in a single attempt:

dat |>
  count(platform, attempts, server) |>
  pivot_wider(names_from = platform, values_from = attempts) |>
  relocate(Swarm, IPFS, Arweave, .after = n) |>
  kable()
server n Swarm IPFS Arweave
Server 1 150 15 1 1
Server 2 150 1 1 1
Server 3 150 1 1 1

So everything in the data look OK at first glance except for the (Swarm, Server 1) combination.

Preliminary analysis

Plotting the raw results, we get:

dat |>
  filter(sha256_match) |>
  select(platform | size_kb | server | time_sec) |>
  mutate(platform = fct_reorder(platform, time_sec)) |>
  mutate(size = case_when(
    size_kb ==     1 ~ "1 KB",
    size_kb ==    10 ~ "10 KB",
    size_kb ==   100 ~ "100 KB",
    size_kb ==  1000 ~ "1 MB",
    size_kb == 10000 ~ "10 MB"
  )) |>
  mutate(size = fct_reorder(size, size_kb)) |>
  ggplot(aes(x = time_sec, color = platform, fill = platform)) +
  geom_density(alpha = 0.2, bw = 0.05) +
  scale_x_log10() +
  labs(x = "Retrieval time (seconds)", y = "Density",
       color = "Platform: ", fill = "Platform: ") +
  scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  scale_fill_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  facet_grid(server ~ size, scales = "fixed") +
  theme_bw() +
  theme(legend.position = "bottom", panel.grid = element_blank())

Here we have retrieval times (on the log scale) along the x-axis and density of incidence along the y-axis. The curves are higher where there are more data. Colors represent the different storage platforms; facet rows are the different servers used, and facet columns are the various data sizes.

At a glance, we see that IPFS is the fastest. For small files, Swarm is faster than Arweave. For 10MB files, it is a bit slower but still comparable. Somewhat strangely, the Swarm distributions look bimodal, even on Server 2 and Server 3 where the downloads succeeded. This should probably be investigated further.

Now we check the relationship between file size and download times, for each unique platform-server combination (removing the faulty (Swarm, Server 1) data, of course):

mergePlatformServer <- function(dat) {
  dat |>
    # Make sure platform-server combinations can be properly sorted:
    mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
    arrange(platform, server, size_kb) |>
    # Merge platform-server combinations, for plotting purposes:
    mutate(plat_serv = as_factor(str_c(platform, ", ", server)))
}

plotPlatformServerFit <- function(dat, x, y, formula = y ~ x, method = lm,
                                  log_y = TRUE) {
  ggplot(dat, aes(x = {{x}}, y = {{y}})) +
    geom_point(color = "steelblue", alpha = 0.5) +
    geom_smooth(method = method, color = "goldenrod", fill = "goldenrod",
                formula = formula) +
    scale_x_log10() +
    { if (log_y) scale_y_log10() else scale_y_continuous() } +
    labs(x = "File size (KB)", y = "Download time (seconds)") +
    facet_wrap(~ plat_serv, scales = "free_y") +
    theme_bw()
}

dat |>
  mergePlatformServer() |>
  # Remove faulty data points, replacing the download times of 0 with NA:
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec, log_y = FALSE)

These data have a clear increasing trend. They are also manifestly nonlinear, so fitting linear functions is unlikely to do well. Instead, let us try to fix this by putting download times on the log scale as well:

dat |>
  mergePlatformServer() |>
  # Remove faulty data points, replacing the download times of 0 with NA:
  mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
  plotPlatformServerFit(size_kb, time_sec, log_y = TRUE)

This is a lot better, although arguably the relationships are still somewhat nonlinear. That said, let us analyze this pattern further by performing a linear regression for each platform-server combination:

regressionDat <- dat |>
  filter(sha256_match) |>
  mutate(predictor = log10(size_kb), response = log10(time_sec)) |>
  nest(data = !platform & !server) |>
  mutate(fit = map(data, \(dat) lm(response ~ predictor, data = dat))) |>
  mutate(regtab = map(fit, broom::tidy)) |>
  unnest(regtab)

Then we can inspect the regression statistics both for the intercepts and the slopes:

regressionDat |>
  select(!data & !fit) |>
  mutate(term = ifelse(term == "(Intercept)", "intercept", "slope")) |>
  arrange(term, platform, server) |>
  kable()
platform server term estimate std.error statistic p.value
Arweave Server 1 intercept 0.0125603 0.0105703 1.188266 0.236632
Arweave Server 2 intercept 0.1716536 0.0056452 30.406813 0.000000
Arweave Server 3 intercept 0.2216665 0.0122245 18.133022 0.000000
IPFS Server 1 intercept -0.7221360 0.0308886 -23.378736 0.000000
IPFS Server 2 intercept -1.0486442 0.0318166 -32.959038 0.000000
IPFS Server 3 intercept -1.2726393 0.0126825 -100.346280 0.000000
Swarm Server 2 intercept -0.6917008 0.0433090 -15.971303 0.000000
Swarm Server 3 intercept -0.7425320 0.0470355 -15.786638 0.000000
Arweave Server 1 slope 0.0972232 0.0043153 22.529853 0.000000
Arweave Server 2 slope 0.0546925 0.0023047 23.731274 0.000000
Arweave Server 3 slope 0.0700226 0.0049906 14.030858 0.000000
IPFS Server 1 slope 0.2271200 0.0126102 18.010803 0.000000
IPFS Server 2 slope 0.1642424 0.0129891 12.644666 0.000000
IPFS Server 3 slope 0.2057088 0.0051776 39.730539 0.000000
Swarm Server 2 slope 0.3540662 0.0176808 20.025443 0.000000
Swarm Server 3 slope 0.3630591 0.0192022 18.907212 0.000000

All parameters are significantly different from zero except for the intercept of Arweave on Server 1. To check how well the assumptions behind linear regression are fulfilled (and thus how much one can trust these results), we make diagnostic plots:

regressionDat |>
  filter(term != "(Intercept)") |>
  mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
  arrange(platform, server) |>
  mutate(diagnostics = map(fit, \(x) {
    autoplot(x, smooth.colour = NA, alpha = 0.3, colour = "steelblue") +
      theme_bw()
  } )) |>
  mutate(diagnostics = pmap(list(diagnostics, platform, server), \(dia, sto, se) {
    gridExtra::grid.arrange(grobs = dia@plots, top = str_c(sto, ", ", se))
  } )) |>
  suppressMessages() |>
  capture.output() |>
  invisible()

Most of these diagnostics look acceptable, although the residuals clearly do depend on fitted values quite often. This is the case for both Server 2 and Server 3 on Swarm. The same is true for IPFS Server 2 and Arweave Server 1. In both cases, there are additionally large outliers distorting the results. In the other cases, the diagnostics look fine.

Building a predictive model

The above linear model convincingly establishes a positive relationship between log file size and log download times that is not simply due to chance. But it might not be the best model for prediction, because the relationship is manifestly nonlinear, yet the fitted curve was is linear function.

One possible improvement is to try fitting a quadratic or cubic curve. In fact, a suite of models will be fit below, and we will employ model selection to try and choose the best. The models are:

  • Quadratic (quad), with the formula \(y_i = \beta_0 + \beta_1 x_i + \beta_2 x_i^2 + \varepsilon_i\). Here \(y_i\) is the \(i\)th log download time (all logarithms are base 10), \(x_i\) is the \(i\)th log file size, the \(\beta_i\) are the regression coefficients, and \(\varepsilon_i\) is the residual variation.
  • Quadratic, but without the linear term (quadsimp). It is the same as quad but with \(\beta_1\) set to zero: \(y_i = \beta_0 + \beta_1 x_i^2 + \varepsilon_i\). This model is therefore less flexible, but also has fewer parameters which reduces the possibility of overfitting.
  • Exponential (exp), with \(y_i = \beta_0 + \beta_1 \exp(x_i) + \varepsilon_i\)

Visually, all these models fit the data reasonably:

plotWithFormula <- function(dat, formula = y ~ x) {
  dat |>
    mergePlatformServer() |>
    mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
    plotPlatformServerFit(size_kb, time_sec, formula = formula)
}

plotWithFormula(dat, y ~ x + I(x^2))

plotWithFormula(dat, y ~ I(x^2))

plotWithFormula(dat, y ~ exp(x))

Let us compare these models. We will fit them all and extract relevant regression statistics, then compare their AIC scores to perform model selection:

modelComparison <- dat |>
  mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
  arrange(platform, server, size_kb) |>
  mutate(plat_serv = as_factor(str_c(platform, ", ", server))) |>
  filter(sha256_match) |>
  mutate(x = log10(size_kb), y = log10(time_sec)) |>
  select(plat_serv, x, y) |>
  crossing(formula = list(
    "linear"    = formula(y ~ x),
    "quad"      = formula(y ~ x + I(x^2)),
    "quadsimp"  = formula(y ~ I(x^2)),
    "exp"       = formula(y ~ exp(x))
  )) |>
  mutate(model = names(formula)) |>
  nest(data = x | y) |>
  mutate(fit = map2(formula, data, lm)) |>
  mutate(regression = map(fit, tidy),
         quality = map(fit, glance)) |>
  separate(plat_serv, into = c("platform", "server"), sep = ", ")

We work with AIC because it is usually favored when prediction is the goal (as opposed to inference, for which BIC is more appropriate). Here is a table with the AIC scores:

modelComparison |>
  unnest(quality) |>
  select(platform | server | model | AIC) |>
  pivot_wider(names_from = model, values_from = AIC) |>
  kable()
platform server linear quad quadsimp exp
Swarm Server 2 74.65295 -24.45972 -14.61966 -18.30025
Swarm Server 3 99.41558 51.70865 50.68415 66.33704
IPFS Server 1 -26.73906 -108.95065 -99.92618 -98.67160
IPFS Server 2 -17.85868 -135.50458 -88.71638 -100.76498
IPFS Server 3 -293.78867 -410.59626 -397.74715 -240.91414
Arweave Server 1 -348.44062 -474.17289 -462.64087 -487.35987
Arweave Server 2 -536.61144 -569.06277 -568.41676 -508.01725
Arweave Server 3 -304.82322 -317.63970 -319.45885 -302.36202

Finding the best models, based on AIC:

modelComparison |>
  unnest(quality) |>
  select(platform | server | model | AIC) |>
  filter(AIC == min(AIC), .by = c(platform, server)) |>
  kable()
platform server model AIC
Swarm Server 2 quad -24.45972
Swarm Server 3 quadsimp 50.68415
IPFS Server 1 quad -108.95065
IPFS Server 2 quad -135.50458
IPFS Server 3 quad -410.59626
Arweave Server 1 exp -487.35987
Arweave Server 2 quad -569.06277
Arweave Server 3 quadsimp -319.45885

The best model is quad one in most cases. Even in the cases when it isn’t, its AIC score is not far above that of the best model. This suggests adopting this model for prediction across the board. Later on we’ll also look at the predictions made by quadsimp; as we will see, it makes almost the same ones as quad.

Let us check the diagnostic plots for the quadratic model quad:

modelComparison |>
  filter(model == "quad") |>
  mutate(diagnostics = map(fit, \(x) {
    autoplot(x, smooth.colour = NA, alpha = 0.3, colour = "steelblue") +
      theme_bw()
  } )) |>
  mutate(diagnostics = pmap(list(diagnostics, platform, server),
                            \(dia, pf, sv, m) {
    gridExtra::grid.arrange(grobs = dia@plots,
                            top = str_c(pf, ", ", sv, ", cubic model"))
  } )) |>
  suppressMessages() |>
  capture.output() |>
  invisible()

Not always amazing, especially the quantile-quantile plot for IPFS, Server 1. That said, here are the regression results:

modelComparison |>
  filter(model == "quad") |>
  unnest(regression) |>
  select(platform, server, term, estimate, std.error, statistic, p.value) |>
  mutate(term = case_match(
    term,
    "(Intercept)" ~ "beta_0",
    "x" ~ "beta_1",
    "I(x^2)" ~ "beta_2"
  )) |>
  kable()
platform server term estimate std.error statistic p.value
Swarm Server 2 beta_0 -0.4370973 0.0376917 -11.5966470 0.0000000
Swarm Server 2 beta_1 -0.1551407 0.0446488 -3.4746920 0.0006724
Swarm Server 2 beta_2 0.1273017 0.0107037 11.8932184 0.0000000
Swarm Server 3 beta_0 -0.5328200 0.0485860 -10.9665446 0.0000000
Swarm Server 3 beta_1 -0.0563647 0.0575539 -0.9793382 0.3290219
Swarm Server 3 beta_2 0.1048560 0.0137975 7.5996436 0.0000000
IPFS Server 1 beta_0 -0.5521749 0.0284402 -19.4153010 0.0000000
IPFS Server 1 beta_1 -0.1128020 0.0336896 -3.3482709 0.0010330
IPFS Server 1 beta_2 0.0849805 0.0080765 10.5219832 0.0000000
IPFS Server 2 beta_0 -0.8506275 0.0260311 -32.6774074 0.0000000
IPFS Server 2 beta_1 -0.2317909 0.0308358 -7.5169356 0.0000000
IPFS Server 2 beta_2 0.0990083 0.0073923 13.3933988 0.0000000
IPFS Server 3 beta_0 -1.1938889 0.0104053 -114.7382135 0.0000000
IPFS Server 3 beta_1 0.0482081 0.0123259 3.9111100 0.0001399
IPFS Server 3 beta_2 0.0393752 0.0029549 13.3253196 0.0000000
Arweave Server 1 beta_0 0.0797464 0.0084182 9.4730916 0.0000000
Arweave Server 1 beta_1 -0.0371489 0.0099720 -3.7253139 0.0002775
Arweave Server 1 beta_2 0.0335930 0.0023906 14.0520845 0.0000000
Arweave Server 2 beta_0 0.1931221 0.0061355 31.4760261 0.0000000
Arweave Server 2 beta_1 0.0117555 0.0072680 1.6174336 0.1079286
Arweave Server 2 beta_2 0.0107342 0.0017424 6.1606966 0.0000000
Arweave Server 3 beta_0 0.2531398 0.0141848 17.8458048 0.0000000
Arweave Server 3 beta_1 0.0070761 0.0168030 0.4211214 0.6742815
Arweave Server 3 beta_2 0.0157366 0.0040282 3.9065947 0.0001423

Plotting the estimates, to see if they are consistent across servers:

modelComparison |>
  filter(model == "quad") |>
  mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
  unnest(regression) |>
  select(platform, server, term, estimate) |>
  mutate(term = case_match(
    term,
    "(Intercept)" ~ "beta[0]",
    "x" ~ "beta[1]",
    "I(x^2)" ~ "beta[2]"
  )) |>
  ggplot(aes(x = term, y = estimate)) +
  geom_point(color = "steelblue", alpha = 0.7) +
  scale_x_discrete(labels = parse_format()) +
  facet_grid(~ platform) +
  theme_bw()

While there is some spread, maybe it is not a great mistake to take the average of the parameters and treat them as “the” parameter for one platform. Let us compute these averages:

paramTab <- modelComparison |>
  filter(model == "quad") |>
  unnest(regression) |>
  select(platform, server, term, estimate) |>
  summarize(estimate = mean(estimate), .by = c(platform, term)) |>
  mutate(term = case_match(
    term,
    "(Intercept)" ~ "b0",
    "x" ~ "b1",
    "I(x^2)" ~ "b2"
  )) |>
  pivot_wider(names_from = term, values_from = estimate)

paramTab |>
  rename(`$\\beta_0$` = b0, `$\\beta_1$` = b1, `$\\beta_2$` = b2) |>
  kable(escape = FALSE, format = "latex") |>
  kable_styling(latex_options = "HOLD_position")

So we have the following models:

Swarm \(y_i = -0.485 - 0.106 x_i + 0.116 x_i^2\)
IPFS \(y_i = -0.866 - 0.0988 x_i + 0.0745 x_i^2\)
Arweave \(y_i = 0.175 - 0.00611 x_i + 0.02 x_i^2\)

This predicts that for small file sizes, IPFS is best (smallest \(\beta_0\)), and Swarm is in the middle. In turn, for very large files, Arweave will be best (smallest \(\beta_2\)) and IPFS second best. To see where the curves take over one another, we can plot all three of them:

# s is log10(file size in KB); bi are coeffs; returns log10(download time):
qfun <- function(s, b0, b1, b2) b0 + b1*s + b2*s^2

paramTab |>
  mutate(platform = as_factor(platform)) |>
  mutate(curve = pmap(list(b0, b1, b2), \(b0, b1, b2) {
    tibble(x = seq(0, 7, l = 101)) |>
      mutate(y = qfun(x, b0, b1, b2))
  } )) |>
  unnest(curve) |>
  mutate(x = 10^x, y = 10^y) |>
  ggplot(aes(x = x, y = y, color = platform)) +
  geom_line(linewidth = 1) +
  labs(x = "File size", y = "Predicted download time", color = NULL) +
  scale_x_log10(breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")) +
  scale_y_log10(breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")) +
  scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  theme_bw()

Incidentally: as advertised above, we can create the same plot using the quadsimp model instead. The predictions do not change appreciably:

modelComparison |>
  filter(model == "quadsimp") |>
  unnest(regression) |>
  select(platform, server, term, estimate) |>
  summarize(estimate = mean(estimate), .by = c(platform, term)) |>
  mutate(term = case_match(
    term,
    "(Intercept)" ~ "b0",
    "I(x^2)" ~ "b1"
  )) |>
  pivot_wider(names_from = term, values_from = estimate) |>
  mutate(platform = as_factor(platform)) |>
  mutate(curve = pmap(list(b0, b1), \(b0, b1) {
    tibble(x = seq(0, 7, l = 101)) |>
      mutate(y = qfun(x, b0, 0, b1))
  } )) |>
  unnest(curve) |>
  mutate(x = 10^x, y = 10^y) |>
  ggplot(aes(x = x, y = y, color = platform)) +
  geom_line(linewidth = 1) +
  labs(x = "File size", y = "Predicted download time", color = NULL) +
  scale_x_log10(breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")) +
  scale_y_log10(breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")) +
  scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  theme_bw()

Plotting the two model predictions together, to show how close they are:

modelComparison |>
  filter(model %in% c("quad", "quadsimp")) |>
  unnest(regression) |>
  select(platform, server, model, term, estimate) |>
  summarize(estimate = mean(estimate),
            .by = c(platform, term, model)) |>
  mutate(term = case_match(
    term,
    "(Intercept)" ~ "b0",
    "x" ~ "b1",
    "I(x^2)" ~ "b2"
  )) |>
  pivot_wider(names_from = term, values_from = estimate,
              values_fill = 0) |>
  mutate(platform = as_factor(platform)) |>
  mutate(curve = pmap(list(b0, b1, b2), \(b0, b1, b2) {
    tibble(x = seq(0, 7, l = 101)) |>
      mutate(y = qfun(x, b0, b1, b2))
  } )) |>
  unnest(curve) |>
  mutate(x = 10^x, y = 10^y) |>
  ggplot(aes(x = x, y = y, color = platform, linetype = model)) +
  geom_line(linewidth = 1) +
  labs(x = "File size", y = "Predicted download time",
       color = "Platform", linetype = "Model") +
  scale_x_log10(breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")) +
  scale_y_log10(breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")) +
  scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
  theme_bw()

Finally, a quick note: it is unlikely that the model is realistic for very large files. for instance, we see that Swarm takes between about 10 minutes (based on quadsimp) and 20 minutes (based on quad) to download 1GB of data. But it is unlikely that downloading 100GB would take 2.44 days (!), as the quadsimp model would predict.